// Scintilla source code edit control
/** @file LexRuby.cxx
 ** Lexer for Ruby.
 **/
// Copyright 2001- by Clemens Wyss <wys@helbling.ch>
// The License.txt file describes the conditions under which this software may be distributed.

#include <cassert>
#include <cstring>

#include <string>
#include <string_view>

#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"

#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "StringUtils.h"
#include "LexerModule.h"

using namespace Lexilla;

namespace {

//KeywordIndex++Autogenerated -- start of section automatically generated
enum {
	KeywordIndex_Keyword = 0,
	KeywordIndex_CodeFolding = 1,
	KeywordIndex_Regex = 2,
	KeywordIndex_PredefinedConstant = 3,
	KeywordIndex_Module = 5,
	KeywordIndex_Class = 6,
	KeywordIndex_BuiltinFunction = 7,
};
//KeywordIndex--Autogenerated -- end of section automatically generated

// This one's redundant, but makes for more readable code
constexpr bool isHighBitChar(char ch) noexcept {
	return static_cast<signed char>(ch) < 0;
}

constexpr bool isSafeAlpha(char ch) noexcept {
	return IsAlpha(ch) || ch == '_';
}

constexpr bool isSafeAlphaOrHigh(char ch) noexcept {
	return isHighBitChar(ch) || isSafeAlpha(ch);
}

constexpr bool isSafeAlnum(char ch) noexcept {
	return IsAlphaNumeric(ch) || ch == '_';
}

constexpr bool isSafeAlnumOrHigh(char ch) noexcept {
	return isHighBitChar(ch) || isSafeAlnum(ch);
}

constexpr bool isSafeDigit(char ch) noexcept {
	return IsADigit(ch);
}

constexpr bool isSafeWordcharOrHigh(char ch) noexcept {
	// Error: scintilla's KeyWords.h includes '.' as a word-char
	// we want to separate things that can take methods from the
	// methods.
	return isHighBitChar(ch) || isSafeAlnum(ch);
}

constexpr bool isEscapeSequence(char ch) noexcept {
	return AnyOf(ch, '\\', 'a', 'b', 'e', 'f', 'n', 'r', 's', 't', 'v');
}

constexpr bool isQestionMarkChar(char chNext, char chNext2) noexcept {
	// followed by a single character or escape sequence that corresponds to a single codepoint
	if (isSafeAlnum(chNext)) {
		return !isSafeWordcharOrHigh(chNext2);
	}
	// multibyte character, escape sequence, punctuation
	return !IsASpace(chNext);
}

#define MAX_KEYWORD_LENGTH 127

bool followsDot(Sci_PositionU pos, LexAccessor &styler) {
	styler.Flush();
	for (; pos >= 1; --pos) {
		const int style = styler.StyleAt(pos);
		char ch;
		switch (style) {
		case SCE_RB_DEFAULT:
			ch = styler[pos];
			if (ch == ' ' || ch == '\t') {
				//continue
			} else {
				return false;
			}
			break;

		case SCE_RB_OPERATOR:
			return styler[pos] == '.';

		default:
			return false;
		}
	}
	return false;
}

// Forward declarations
bool keywordIsAmbiguous(const char *prevWord) noexcept;
bool keywordDoStartsLoop(Sci_Position pos, LexAccessor &styler);
bool keywordIsModifier(const char *word, Sci_Position pos, LexAccessor &styler);

constexpr bool IsIdentifierStyle(int style) noexcept {
	return style == SCE_RB_IDENTIFIER
		|| style == SCE_RB_LIKE_MODULE
		|| style == SCE_RB_LIKE_CLASS
		|| style == SCE_RB_BUILTIN_CONSTANT
		|| style == SCE_RB_BUILTIN_FUNCTION;
}

int ClassifyWordRb(Sci_PositionU start, Sci_PositionU end, char ch, char chNext, LexerWordList keywordLists, LexAccessor &styler, char *prevWord) {
	char s[MAX_KEYWORD_LENGTH + 1];
	styler.GetRange(start, end, s, sizeof(s));
	int chAttr = SCE_RB_IDENTIFIER;
	int style = SCE_RB_DEFAULT;
	if (StrEqual(prevWord, "class")) {
		chAttr = SCE_RB_CLASS_NAME;
	} else if (StrEqual(prevWord, "module")) {
		chAttr = SCE_RB_MODULE_NAME;
	} else if (StrEqual(prevWord, "def")) {
		chAttr = SCE_RB_DEF_NAME;
		if (ch == '.' || (ch == ':' && chNext == ':')) {
			if (StrEqual(s, "self")) {
				style = SCE_RB_WORD_DEMOTED;
			} else if (IsUpperCase(s[0])) {
				style = SCE_RB_LIKE_CLASS;
			} else {
				style = SCE_RB_IDENTIFIER;
			}
		}
	} else if (keywordLists[KeywordIndex_Keyword].InList(s) && ((start == 0) || !followsDot(start - 1, styler))) {
		if (keywordIsAmbiguous(s) && keywordIsModifier(s, start, styler)) {

			// Demoted keywords are colored as keywords,
			// but do not affect changes in indentation.
			//
			// Consider the word 'if':
			// 1. <<if test ...>> : normal
			// 2. <<stmt if test>> : demoted
			// 3. <<lhs = if ...>> : normal: start a new indent level
			// 4. <<obj.if = 10>> : color as identifier, since it follows '.'

			chAttr = SCE_RB_WORD_DEMOTED;
		} else {
			chAttr = SCE_RB_WORD;
			style = SCE_RB_WORD;
			strcpy(prevWord, s);
		}
	} else {
		if (IsUpperCase(s[0])) {
			if (keywordLists[KeywordIndex_PredefinedConstant].InList(s)) {
				chAttr = SCE_RB_BUILTIN_CONSTANT;
			} else if ((ch == ':' && chNext == ':') || keywordLists[KeywordIndex_Module].InList(s)) {
				// module::class::name
				chAttr = SCE_RB_LIKE_MODULE;
			} else if ((ch == '.' && chNext == 'n') || keywordLists[KeywordIndex_Class].InList(s)) {
				// Class.new
				chAttr = SCE_RB_LIKE_CLASS;
			}
		} else if (IsLowerCase(s[0]) || s[0] == '_') {
			if (keywordLists[KeywordIndex_BuiltinFunction].InList(s)) {
				chAttr = SCE_RB_BUILTIN_FUNCTION;
			}
		}
		if (chAttr == SCE_RB_IDENTIFIER && ch == '(') {
			chAttr = SCE_RB_FUNCTION;
		}
	}
	if (style == SCE_RB_DEFAULT) {
		style = chAttr;
		prevWord[0] = '\0';
	}
	styler.ColorTo(end, style);
	return chAttr;
}

// Do Ruby better -- find the end of the line, work back,
// and then check for leading white space

// Precondition: the here-doc target can be indented
bool lookingAtHereDocDelim(LexAccessor &styler, Sci_Position pos, const char *HereDocDelim) noexcept {
	if (!styler.Match(pos, HereDocDelim)) {
		return false;
	}
	while (--pos > 0) {
		const char ch = styler[pos];
		if (IsEOLChar(ch)) {
			return true;
		} else if (ch != ' ' && ch != '\t') {
			return false;
		}
	}
	return false;
}

// https://docs.ruby-lang.org/en/master/syntax/literals_rdoc.html#label-Percent+Literals
constexpr char opposite(char ch) noexcept {
	if (ch == '(') {
		return ')';
	}
	if (AnyOf<'[', '{'>(ch)) {
		return ch + 2;
	}
	if (ch == '<') {
		return '>';
	}
	return ch;
}

// Null transitions when we see we've reached the end
// and need to relex the curr char.

void redo_char(Sci_Position &i, char ch, char &chNext, char &chNext2, int &state) noexcept {
	i--;
	chNext2 = chNext;
	chNext = ch;
	state = SCE_RB_DEFAULT;
}

void advance_char(Sci_Position &i, char &ch, char &chNext, char chNext2) noexcept {
	i++;
	ch = chNext;
	chNext = chNext2;
}

// precondition: startPos points to one after the EOL char
bool currLineContainsHereDelims(Sci_Position &startPos, LexAccessor &styler) noexcept {
	if (startPos <= 1)
		return false;

	Sci_Position pos;
	for (pos = startPos - 1; pos > 0; pos--) {
		const char ch = styler.SafeGetCharAt(pos);
		if (IsEOLChar(ch)) {
			// Leave the pointers where they are -- there are no
			// here doc delims on the current line, even if
			// the EOL isn't default style

			return false;
		} else {
			if (styler.BufferStyleAt(pos) == SCE_RB_HERE_DELIM) {
				break;
			}
		}
	}
	if (pos == 0) {
		return false;
	}
	// Update the pointers so we don't have to re-analyze the string
	startPos = pos;
	return true;
}

// This class is used by the enter and exit methods, so it needs
// to be hoisted out of the function.

class QuoteCls {
public:
	int	Count = 0;
	char Up = '\0';
	char Down = '\0';
	void New() noexcept {
		Count = 0;
		Up	  = '\0';
		Down  = '\0';
	}
	void Open(char u) noexcept {
		Count++;
		Up	  = u;
		Down  = opposite(Up);
	}
};

constexpr bool IsPercentLiteral(int state) noexcept {
	return state == SCE_RB_STRING_Q
		|| state == SCE_RB_STRING_QQ
		// excluded SCE_RB_STRING_QR
		|| state == SCE_RB_STRING_W
		|| state == SCE_RB_STRING_QW
		|| state == SCE_RB_STRING_I
		|| state == SCE_RB_STRING_QI
		|| state == SCE_RB_STRING_QS
		|| state == SCE_RB_STRING_QX;
}

constexpr bool IsInterpolableLiteral(int state) noexcept {
	return state != SCE_RB_STRING_Q
		&& state != SCE_RB_STRING_W
		&& state != SCE_RB_STRING_I
		&& state != SCE_RB_STRING_QS
		&& state != SCE_RB_STRING_SQ;
}

constexpr bool IsSingleSpecialVariable(char ch) noexcept {
	// https://docs.ruby-lang.org/en/master/globals_rdoc.html
	return AnyOf(ch, '~', '*', '$', '?', '!', '@', '/', '\\', ';', ',', '.', '=', ':', '<', '>', '"', '&', '`', '\'', '+');
}

void InterpolateVariable(LexAccessor &styler, int state, Sci_Position &i, char &ch, char &chNext, char chNext2) {
	Sci_Position pos = i;
	styler.ColorTo(pos, state);
	styler.ColorTo(pos + 1, SCE_RB_OPERATOR);
	state = SCE_RB_GLOBAL;
	pos += 2;
	unsigned len = 0;
	if (chNext == '$') {
		if (chNext2 == '-') {
			++pos;
			len = 2;
		} else if (IsSingleSpecialVariable(chNext2)) {
			++pos;
			len = 1;
		}
	} else {
		state = SCE_RB_INSTANCE_VAR;
		if (chNext2 == '@') {
			state = SCE_RB_CLASS_VAR;
			++pos;
		}
	}
	while (true) {
		chNext2 = styler.SafeGetCharAt(pos);
		--len;
		if (len == 0 || !isSafeWordcharOrHigh(chNext2)) {
			break;
		}
		++pos;
	}
	styler.ColorTo(pos, state);
	i = pos - 1;
	ch = chNext;
	chNext = chNext2;
}

// This routine looks for false positives like
// undef foo, <<
// There aren't too many.
//
// iPrev points to the start of <<

bool sureThisIsHeredoc(Sci_Position iPrev, LexAccessor &styler, char *prevWord) {
	// Not so fast, since Ruby's so dynamic.  Check the context
	// to make sure we're OK.
	int prevStyle;
	const Sci_Line lineStart = styler.GetLine(iPrev);
	const Sci_Position lineStartPosn = styler.LineStart(lineStart);
	styler.Flush();

	// Find the first word after some whitespace
	const Sci_Position firstWordPosn = LexSkipSpaceTab(styler, lineStartPosn, iPrev);
	if (firstWordPosn >= iPrev) {
		// Have something like {^	  <<}
		//XXX Look at the first previous non-comment non-white line
		// to establish the context.  Not too likely though.
		return true;
	} else {
		prevStyle = styler.StyleAt(firstWordPosn);
		switch (prevStyle) {
		case SCE_RB_WORD:
		case SCE_RB_WORD_DEMOTED:
			break;
		default:
			return true;
		}
	}
	Sci_Position firstWordEndPosn = firstWordPosn;
	char *dst = prevWord;
	for (;;) {
		if (firstWordEndPosn >= iPrev ||
			styler.StyleAt(firstWordEndPosn) != prevStyle) {
			*dst = 0;
			break;
		}
		*dst++ = styler[firstWordEndPosn];
		firstWordEndPosn += 1;
	}
	//XXX Write a style-aware thing to regex scintilla buffer objects
	if (StrEqualsAny(prevWord, "undef", "def", "alias")) {
		// These keywords are what we were looking for
		return false;
	}
	return true;
}

// Routine that saves us from allocating a buffer for the here-doc target
// targetEndPos points one past the end of the current target
bool haveTargetMatch(Sci_Position currPos, Sci_Position lengthDoc, Sci_Position targetStartPos, Sci_Position targetEndPos, LexAccessor &styler) noexcept {
	if (lengthDoc - currPos < targetEndPos - targetStartPos) {
		return false;
	}
	for (Sci_Position i = targetStartPos, j = currPos;
		i < targetEndPos && j < lengthDoc;
		i++, j++) {
		if (styler[i] != styler[j]) {
			return false;
		}
	}
	return true;
}

// Finds the start position of the expression containing @p pos
// @p min_pos should be a known expression start, e.g. the start of the line
Sci_Position findExpressionStart(Sci_Position pos, Sci_Position min_pos, LexAccessor &styler) noexcept {
	int depth = 0;
	for (; pos > min_pos; pos -= 1) {
		const int style = styler.StyleAt(pos - 1);
		if (style == SCE_RB_OPERATOR) {
			const char ch = styler[pos - 1];
			if (ch == '}' || ch == ')' || ch == ']') {
				depth += 1;
			} else if (ch == '{' || ch == '(' || ch == '[') {
				if (depth == 0) {
					break;
				} else {
					depth -= 1;
				}
			} else if (ch == ';' && depth == 0) {
				break;
			}
		}
	}
	return pos;
}

// We need a check because the form
// [identifier] <<[target]
// is ambiguous.  The Ruby lexer/parser resolves it by
// looking to see if [identifier] names a variable or a
// function.  If it's the first, it's the start of a here-doc.
// If it's a var, it's an operator.  This lexer doesn't
// maintain a symbol table, so it looks ahead to see what's
// going on, in cases where we have
// ^[white-space]*[identifier([.|::]identifier)*][white-space]*<<[target]
//
// If there's no occurrence of [target] on a line, assume we don't.

// return true == yes, we have no heredocs

bool sureThisIsNotHeredoc(Sci_Position lt2StartPos, LexAccessor &styler) {
	// Use full document, not just part we're styling
	const Sci_Position lengthDoc = styler.Length();
	const Sci_Line lineStart = styler.GetLine(lt2StartPos);
	const Sci_Position lineStartPosn = styler.LineStart(lineStart);
	styler.Flush();
	constexpr bool definitely_not_a_here_doc = true;
	constexpr bool looks_like_a_here_doc = false;

	// find the expression start rather than the line start
	const Sci_Position exprStartPosn = findExpressionStart(lt2StartPos, lineStartPosn, styler);

	// Find the first word after some whitespace
	Sci_Position firstWordPosn = LexSkipWhiteSpace(styler, exprStartPosn, lt2StartPos);
	if (firstWordPosn >= lt2StartPos) {
		return definitely_not_a_here_doc;
	}
	int prevStyle = styler.StyleAt(firstWordPosn);
	// If we have '<<' following a keyword, it's not a heredoc
	if (prevStyle != SCE_RB_IDENTIFIER
		&& prevStyle != SCE_RB_GLOBAL 		// $stdout and $stderr
		&& prevStyle != SCE_RB_BUILTIN_FUNCTION
		&& prevStyle != SCE_RB_LIKE_MODULE	// module::method
		&& prevStyle != SCE_RB_LIKE_CLASS	// class::method
		&& prevStyle != SCE_RB_INSTANCE_VAR
		&& prevStyle != SCE_RB_CLASS_VAR) {
		return definitely_not_a_here_doc;
	}
	int newStyle = prevStyle;
	// Some compilers incorrectly warn about uninit newStyle
	for (firstWordPosn += 1; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
		// Inner loop looks at the name
		for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
			newStyle = styler.StyleAt(firstWordPosn);
			if (newStyle != prevStyle) {
				break;
			}
		}
		// Do we have '::' or '.'?
		if (firstWordPosn < lt2StartPos && newStyle == SCE_RB_OPERATOR) {
			const char ch = styler[firstWordPosn];
			if (ch == '.') {
				// yes
			} else if (ch == ':') {
				if (styler.StyleAt(++firstWordPosn) != SCE_RB_OPERATOR) {
					return definitely_not_a_here_doc;
				} else if (styler[firstWordPosn] != ':') {
					return definitely_not_a_here_doc;
				}
			} else {
				break;
			}
		} else {
			break;
		}
		// on second and next passes, only identifiers may appear since
		// class and instance variable are private
		prevStyle = SCE_RB_IDENTIFIER;
	}
	// Skip next batch of white-space
	firstWordPosn = LexSkipSpaceTab(styler, firstWordPosn, lt2StartPos);
	// possible symbol for an implicit hash argument
	if (firstWordPosn < lt2StartPos && styler.StyleAt(firstWordPosn) == SCE_RB_SYMBOL) {
		for (; firstWordPosn <= lt2StartPos; firstWordPosn += 1) {
			if (styler.StyleAt(firstWordPosn) != SCE_RB_SYMBOL) {
				break;
			}
		}
		// Skip next batch of white-space
		firstWordPosn = LexSkipWhiteSpace(styler, firstWordPosn, lt2StartPos);
	}
	if (firstWordPosn != lt2StartPos) {
		// Have [[^ws[identifier]ws[*something_else*]ws<<
		return definitely_not_a_here_doc;
	}
	// OK, now 'j' will point to the current spot moving ahead
	Sci_Position j = firstWordPosn + 1;
	if (styler.StyleAt(j) != SCE_RB_OPERATOR || styler[j] != '<') {
		// This shouldn't happen
		return definitely_not_a_here_doc;
	}
	const Sci_Position nextLineStartPosn = styler.LineStart(lineStart + 1);
	if (nextLineStartPosn >= lengthDoc) {
		return definitely_not_a_here_doc;
	}
	j = LexSkipSpaceTab(styler, j + 1, nextLineStartPosn);
	if (j >= lengthDoc) {
		return definitely_not_a_here_doc;
	}
	bool allow_indent;
	Sci_Position target_start;
	Sci_Position target_end;
	// From this point on no more styling, since we're looking ahead
	if (styler[j] == '-' || styler[j] == '~') {
		allow_indent = true;
		j++;
	} else {
		allow_indent = false;
	}

	// Allow for quoted targets.
	char target_quote = 0;
	switch (styler[j]) {
	case '\'':
	case '"':
	case '`':
		target_quote = styler[j];
		j += 1;
	}

	if (isSafeAlnumOrHigh(styler[j])) {
		// Init target_end because some compilers think it won't
		// be initialized by the time it's used
		target_start = target_end = j;
		j++;
	} else {
		return definitely_not_a_here_doc;
	}
	for (; j < lengthDoc; j++) {
		if (!isSafeAlnumOrHigh(styler[j])) {
			if (target_quote && styler[j] != target_quote) {
				// unquoted end
				return definitely_not_a_here_doc;
			}

			// And for now make sure that it's a newline
			// don't handle arbitrary expressions yet

			target_end = j;
			if (target_quote) {
				// Now we can move to the character after the string delimiter.
				j += 1;
			}
			j = LexSkipSpaceTab(styler, j, lengthDoc);
			if (j >= lengthDoc) {
				return definitely_not_a_here_doc;
			} else {
				const char ch = styler[j];
				if (ch == '#' || IsEOLChar(ch) || ch == '.' || ch == ',' || IsLowerCase(ch)) {
					// This is OK, so break and continue;
					break;
				} else {
					return definitely_not_a_here_doc;
				}
			}
		}
	}

	// Just look at the start of each line
	Sci_Line last_line = styler.GetLine(lengthDoc - 1);
	// But don't go too far
	if (last_line > lineStart + 50) {
		last_line = lineStart + 50;
	}
	for (Sci_Line line_num = lineStart + 1; line_num <= last_line; line_num++) {
		j = styler.LineStart(line_num);
		if (allow_indent) {
			j = LexSkipSpaceTab(styler, j, lengthDoc);
		}
		// target_end is one past the end
		if (haveTargetMatch(j, lengthDoc, target_start, target_end, styler)) {
			// We got it
			return looks_like_a_here_doc;
		}
	}
	return definitely_not_a_here_doc;
}

//todo: if we aren't looking at a stdio character,
// move to the start of the first line that is not in a
// multi-line construct

void synchronizeDocStart(Sci_PositionU & startPos, Sci_Position &length, int &initStyle, LexAccessor &styler, bool skipWhiteSpace = false) {
#if 0
	styler.Flush();
	const int style = styler.StyleAt(startPos);
	switch (style) {
	case SCE_RB_STDIN:
	case SCE_RB_STDOUT:
	case SCE_RB_STDERR:
		// Don't do anything else with these.
		return;
	}
#endif

	Sci_Position pos = startPos;
	// Quick way to characterize each line
	Sci_Line lineStart;
	for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
		// Now look at the style before the previous line's EOL
		pos = styler.LineStart(lineStart) - 1;
		if (pos <= 10) {
			lineStart = 0;
			break;
		}
		const char ch = styler.SafeGetCharAt(pos);
		const char chPrev = styler.SafeGetCharAt(pos - 1);
		if (ch == '\n' && chPrev == '\r') {
			pos--;
		}
		if (styler.SafeGetCharAt(pos - 1) == '\\') {
			// Continuation line -- keep going
		} else if (styler.StyleAt(pos) != SCE_RB_DEFAULT) {
			// Part of multi-line construct -- keep going
		} else if (currLineContainsHereDelims(pos, styler)) {
			// Keep going, with pos and length now pointing
			// at the end of the here-doc delimiter
		} else if (skipWhiteSpace && IsLexEmptyLine(styler, lineStart - 1)) {
			// Keep going
		} else {
			break;
		}
	}
	pos = styler.LineStart(lineStart);
	length += (startPos - pos);
	startPos = pos;
	initStyle = SCE_RB_DEFAULT;
}

void ColouriseRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, LexerWordList keywordLists, Accessor &styler) {
	// Lexer for Ruby often has to backtrack to start of current style to determine
	// which characters are being used as quotes, how deeply nested is the
	// start position and what the termination string is for here documents

	class HereDocCls {
	public:
		uint8_t State = 0;
		// States
		// 0: '<<' encountered
		// 1: collect the delimiter
		// 1b: text between the end of the delimiter and the EOL
		// 2: here doc text (lines after the delimiter)
		char Quote = '\0';		// the char after '<<'
		bool Quoted = false;		// true if Quote in ('\'','"','`')
		bool CanBeIndented = false;
		unsigned DelimiterLength = 0;	// strlen(Delimiter)
		char Delimiter[256] {};	// the Delimiter, limit of 256: from Perl
	};
	HereDocCls HereDoc;

	QuoteCls Quote;

	synchronizeDocStart(startPos, length, initStyle, styler, false);
	const Sci_Position lengthDoc = startPos + length;

	bool preferRE = true;
	bool afterDef = false;
	int state = initStyle;
	char prevWord[MAX_KEYWORD_LENGTH + 1] {}; // 1 byte for zero

	char chPrev = styler.SafeGetCharAt(startPos - 1);
	char chNext = styler.SafeGetCharAt(startPos);
	bool is_real_number = true;	  // Differentiate between constants and ?-sequences.
	styler.StartAt(startPos);
	styler.StartSegment(startPos);

	constexpr uint64_t q_states = SCE_RB_STRING_Q
							 | (static_cast<uint64_t>(SCE_RB_STRING_QQ) << 6)
							 | (static_cast<uint64_t>(SCE_RB_STRING_QR) << 12)
							 | (static_cast<uint64_t>(SCE_RB_STRING_W) << 18)
							 | (static_cast<uint64_t>(SCE_RB_STRING_QW) << 24)
							 | (static_cast<uint64_t>(SCE_RB_STRING_QX) << 30)
							 | (static_cast<uint64_t>(SCE_RB_STRING_I) << 36)
							 | (static_cast<uint64_t>(SCE_RB_STRING_QI) << 42)
							 | (static_cast<uint64_t>(SCE_RB_STRING_QS) << 48);
	constexpr const char* q_chars = "qQrwWxiIs";

	// In most cases a value of 2 should be ample for the code in the
	// Ruby library, and the code the user is likely to enter.
	// For example,
	// fu_output_message "mkdir #{options[:mode] ? ('-m %03o ' % options[:mode]) : ''}#{list.join ' '}"
	//	   if options[:verbose]
	// from fileutils.rb nests to a level of 2
	// If the user actually hits a 6th occurrence of '#{' in a double-quoted
	// string (including regex'es, %Q, %<sym>, %w, and other strings
	// that interpolate), it will stay as a string.	 The problem with this
	// is that quotes might flip, a 7th '#{' will look like a comment,
	// and code-folding might be wrong.

	// If anyone runs into this problem, I recommend raising this
	// value slightly higher to replacing the fixed array with a linked
	// list.  Keep in mind this code will be called every time the lexer
	// is invoked.

#define INNER_STRINGS_MAX_COUNT 5
	class InnerExpression {
		// These vars track our instances of "...#{,,,%Q<..#{,,,}...>,,,}..."
		int inner_string_types[INNER_STRINGS_MAX_COUNT] {};
		// Track # braces when we push a new #{ thing
		int inner_expn_brace_counts[INNER_STRINGS_MAX_COUNT] {};
		QuoteCls inner_quotes[INNER_STRINGS_MAX_COUNT];
		int inner_string_count = 0;

	public:
		int brace_counts = 0;	// Number of #{ ... } things within an expression

		bool canEnter() const noexcept {
			return inner_string_count < INNER_STRINGS_MAX_COUNT;
		}
		bool canExit() const noexcept {
			return inner_string_count > 0;
		}
		void enter(int &state, const QuoteCls &curr_quote) noexcept {
			inner_string_types[inner_string_count] = state;
			state = SCE_RB_DEFAULT;
			inner_expn_brace_counts[inner_string_count] = brace_counts;
			brace_counts = 0;
			inner_quotes[inner_string_count] = curr_quote;
			++inner_string_count;
		}
		void exit(int &state, QuoteCls &curr_quote) noexcept {
			--inner_string_count;
			state = inner_string_types[inner_string_count];
			brace_counts = inner_expn_brace_counts[inner_string_count];
			curr_quote = inner_quotes[inner_string_count];
		}
	};
	InnerExpression innerExpr;

	for (Sci_Position i = startPos; i < lengthDoc; i++) {
		char ch = chNext;
		chNext = styler.SafeGetCharAt(i + 1);
		char chNext2 = styler.SafeGetCharAt(i + 2);

		if (styler.IsLeadByte(ch)) {
			chNext = chNext2;
			chPrev = ' ';
			i += 1;
			continue;
		}

		// skip on DOS/Windows
		//No, don't, because some things will get tagged on,
		// so we won't recognize keywords, for example
#if 0
		if (ch == '\r' && chNext == '\n') {
			continue;
		}
#endif

		if (HereDoc.State == 1 && IsEOLChar(ch)) {
			// Begin of here-doc (the line after the here-doc delimiter):
			HereDoc.State = 2;
			if (state == SCE_RB_WORD) {
				const Sci_Position wordStartPos = styler.GetStartSegment();
				ClassifyWordRb(wordStartPos, i, ch, chNext, keywordLists, styler, prevWord);
			} else {
				styler.ColorTo(i, state);
			}
			// Don't check for a missing quote, just jump into
			// the here-doc state
			state = SCE_RB_HERE_QQ;
			if (HereDoc.Quoted) {
				if (HereDoc.Quote == '\'') {
					state = SCE_RB_HERE_Q;
				} else if (HereDoc.Quote == '`') {
					state = SCE_RB_HERE_QX;
				}
			}
		}

		// Regular transitions
		if (state == SCE_RB_DEFAULT) {
			if (isSafeDigit(ch)) {
				styler.ColorTo(i, state);
				state = SCE_RB_NUMBER;
				is_real_number = true;
			} else if (isSafeAlphaOrHigh(ch)) {
				styler.ColorTo(i, state);
				state = SCE_RB_WORD;
			} else if (ch == '#') {
				styler.ColorTo(i, state);
				state = SCE_RB_COMMENTLINE;
			} else if (ch == '=') {
				// =begin indicates the start of a comment (doc) block
				if ((i == 0 || IsEOLChar(chPrev))
					&& chNext == 'b'
					&& styler.SafeGetCharAt(i + 2) == 'e'
					&& styler.SafeGetCharAt(i + 3) == 'g'
					&& styler.SafeGetCharAt(i + 4) == 'i'
					&& styler.SafeGetCharAt(i + 5) == 'n'
					&& !isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 6))) {
					styler.ColorTo(i, state);
					state = SCE_RB_POD;
				} else {
					styler.ColorTo(i, state);
					styler.ColorTo(i + 1, SCE_RB_OPERATOR);
					preferRE = true;
				}
			} else if (ch == '"') {
				styler.ColorTo(i, state);
				state = SCE_RB_STRING_DQ;
				Quote.New();
				Quote.Open(ch);
			} else if (ch == '\'') {
				styler.ColorTo(i, state);
				state = SCE_RB_STRING_SQ;
				Quote.New();
				Quote.Open(ch);
			} else if (ch == '`') {
				styler.ColorTo(i, state);
				state = SCE_RB_BACKTICKS;
				Quote.New();
				Quote.Open(ch);
			} else if (ch == '@') {
				// Instance or class var
				styler.ColorTo(i, state);
				if (chNext == '@') {
					state = SCE_RB_CLASS_VAR;
					advance_char(i, ch, chNext, chNext2); // pass by ref
				} else {
					state = SCE_RB_INSTANCE_VAR;
				}
			} else if (ch == '$') {
				// Check for a builtin global
				styler.ColorTo(i, state);
				// Recognize it bit by bit
				state = SCE_RB_GLOBAL;
			} else if (ch == '/' && preferRE) {
				// Ambiguous operator
				styler.ColorTo(i, state);
				state = SCE_RB_REGEX;
				Quote.New();
				Quote.Open(ch);
			} else if (ch == '<' && chNext == '<' && chNext2 != '=') {
				if (afterDef) {
					afterDef = false;
					prevWord[0] = 0;
				}
				// Recognise the '<<' symbol - either a here document or a binary op
				styler.ColorTo(i, state);
				i++;
				chNext = chNext2;
				styler.ColorTo(i + 1, SCE_RB_OPERATOR);

				if (!(AnyOf(chNext2, '\"', '\'', '`', '-', '~') || isSafeAlphaOrHigh(chNext2))) {
					// It's definitely not a here-doc,
					// based on Ruby's lexer/parser in the
					// heredoc_identifier routine.
					// Nothing else to do.
				} else if (preferRE) {
					if (sureThisIsHeredoc(i - 1, styler, prevWord)) {
						state = SCE_RB_HERE_DELIM;
						HereDoc.State = 0;
					}
					// else leave it in default state
				} else {
					if (sureThisIsNotHeredoc(i - 1, styler)) {
						// leave state as default
						// We don't have all the heuristics Perl has for indications
						// of a here-doc, because '<<' is overloadable and used
						// for so many other classes.
					} else {
						state = SCE_RB_HERE_DELIM;
						HereDoc.State = 0;
					}
				}
				preferRE = (state != SCE_RB_HERE_DELIM);
			} else if (ch == ':') {
				afterDef = false;
				styler.ColorTo(i, state);
				if (chNext == ':') {
					// Mark "::" as an operator, not symbol start
					styler.ColorTo(i + 2, SCE_RB_OPERATOR);
					advance_char(i, ch, chNext, chNext2); // pass by ref
					state = SCE_RB_DEFAULT;
					preferRE = false;
				} else if (isSafeWordcharOrHigh(chNext)) {
					state = SCE_RB_SYMBOL;
				} else if ((chNext == '@' || chNext == '$') &&
						   isSafeWordcharOrHigh(chNext2)) {
					// instance and global variable followed by an identifier
					advance_char(i, ch, chNext, chNext2);
					state = SCE_RB_SYMBOL;
				} else if (((chNext == '@' && chNext2 == '@')  ||
							(chNext == '$' && chNext2 == '-')) &&
						   isSafeWordcharOrHigh(styler.SafeGetCharAt(i + 3))) {
					// class variables and special global variable "$-IDENTCHAR"
					state = SCE_RB_SYMBOL;
					// $-IDENTCHAR doesn't continue past the IDENTCHAR
					if (chNext == '$') {
						styler.ColorTo(i + 4, SCE_RB_SYMBOL);
						state = SCE_RB_DEFAULT;
					}
					i += 3;
					ch = styler.SafeGetCharAt(i);
					chNext = styler.SafeGetCharAt(i + 1);
				} else if (chNext == '$' && IsSingleSpecialVariable(chNext2)) {
					// single-character special global variables
					i += 2;
					ch = chNext2;
					chNext = styler.SafeGetCharAt(i+1);
					styler.ColorTo(i + 1, SCE_RB_SYMBOL);
					state = SCE_RB_DEFAULT;
				} else if (AnyOf(chNext, '[', '*', '!', '~', '+', '-', '*', '/', '%', '=', '<', '>', '&', '^', '|')) {
					// Do the operator analysis in-line, looking ahead
					// Based on the table in pickaxe 2nd ed., page 339
					bool doColoring = true;
					switch (chNext) {
					case '[':
						if (chNext2 == ']') {
							const char ch_tmp = styler.SafeGetCharAt(i + 3);
							if (ch_tmp == '=') {
								i += 3;
								ch = ch_tmp;
								chNext = styler.SafeGetCharAt(i + 1);
							} else {
								i += 2;
								ch = chNext2;
								chNext = ch_tmp;
							}
						} else {
							doColoring = false;
						}
						break;

					case '*':
						if (chNext2 == '*') {
							i += 2;
							ch = chNext2;
							chNext = styler.SafeGetCharAt(i + 1);
						} else {
							advance_char(i, ch, chNext, chNext2);
						}
						break;

					case '!':
						if (chNext2 == '=' || chNext2 == '~') {
							i += 2;
							ch = chNext2;
							chNext = styler.SafeGetCharAt(i + 1);
						} else {
							advance_char(i, ch, chNext, chNext2);
						}
						break;

					case '<':
						if (chNext2 == '<') {
							i += 2;
							ch = chNext2;
							chNext = styler.SafeGetCharAt(i + 1);
						} else if (chNext2 == '=') {
							const char ch_tmp = styler.SafeGetCharAt(i + 3);
							if (ch_tmp == '>') {  // <=> operator
								i += 3;
								ch = ch_tmp;
								chNext = styler.SafeGetCharAt(i + 1);
							} else {
								i += 2;
								ch = chNext2;
								chNext = ch_tmp;
							}
						} else {
							advance_char(i, ch, chNext, chNext2);
						}
						break;

					default:
						// Simple one-character operators
						advance_char(i, ch, chNext, chNext2);
						break;
					}
					if (doColoring) {
						styler.ColorTo(i + 1, SCE_RB_SYMBOL);
						state = SCE_RB_DEFAULT;
					}
				} else if (!preferRE && !IsASpace(chNext)) {
					// Don't color symbol strings (yet)
					// Just color the ":" and color rest as string
					styler.ColorTo(i + 1, SCE_RB_SYMBOL);
					state = SCE_RB_DEFAULT;
				} else {
					styler.ColorTo(i + 1, SCE_RB_OPERATOR);
					state = SCE_RB_DEFAULT;
					preferRE = true;
				}
			} else if (ch == '%' && !afterDef) {
				styler.ColorTo(i, state);
				bool have_string = false;
				const char *hit = strchr(q_chars, chNext);
				if (hit && !isSafeWordcharOrHigh(chNext2)) {
					state = (q_states >> ((hit - q_chars)*6)) & 0x3f;
					Quote.New();
					Quote.Open(chNext2);
					i += 2;
					ch = chNext2;
					chNext = styler.SafeGetCharAt(i + 1);
					have_string = true;
				} else if ((preferRE || (IsAGraphic(chNext) && chNext != '=')) && !isSafeWordcharOrHigh(chNext)) {
					// Ruby doesn't allow high bit chars here,
					// but the editor host might
					state = SCE_RB_STRING_QQ;
					Quote.New();
					Quote.Open(chNext);
					advance_char(i, ch, chNext, chNext2); // pass by ref
					have_string = true;
				}
				if (!have_string) {
					styler.ColorTo(i + 1, SCE_RB_OPERATOR);
					// stay in default
					preferRE = true;
				}
			} else if (ch == '?') {
				afterDef = false;
				styler.ColorTo(i, state);
				if (isHighBitChar(chNext)) {
					preferRE = false;
					Sci_Position width = 1;
					styler.GetCharacterAndWidth(i + 1, &width);
					chNext = styler.SafeGetCharAt(i + 1 + width);
					if (isSafeWordcharOrHigh(chNext)) {
						styler.ColorTo(i + 1, SCE_RB_OPERATOR);
						i += width;
						state = SCE_RB_WORD;
					} else {
						i += width;
						styler.ColorTo(i + 1, SCE_RB_NUMBER);
					}
				} else if (!isQestionMarkChar(chNext, chNext2)) {
					styler.ColorTo(i + 1, SCE_RB_OPERATOR);
					preferRE = chNext <= ' ';
				} else {
					// It's the start of a character code escape sequence
					// Color it as a number.
					state = SCE_RB_NUMBER;
					is_real_number = false;
				}
			} else if (isoperator(ch)) {
				styler.ColorTo(i, state);
				if (afterDef && ch != '.') {
					afterDef = false;
					prevWord[0] = 0;
					if (chNext == '@' && (ch == '+' || ch == '-' || ch == '!')) {
						// unary operator method
						ch = chNext;
						chNext = chNext2;
						i += 1;
					}
				}
				styler.ColorTo(i + 1, SCE_RB_OPERATOR);
				// If we're ending an expression or block,
				// assume it ends an object, and the ambivalent
				// constructs are binary operators
				//
				// So if we don't have one of these chars,
				// we aren't ending an object exp'n, and ops
				// like : << / are unary operators.

				if (ch == '{') {
					++innerExpr.brace_counts;
					preferRE = true;
				} else if (ch == '}' && --innerExpr.brace_counts < 0
					&& innerExpr.canExit()) {
					styler.ColorTo(i + 1, SCE_RB_OPERATOR);
					innerExpr.exit(state, Quote);
				} else {
					preferRE = !AnyOf(ch, ')', '}', ']', '.');
				}
				// Stay in default state
			} else if (IsEOLChar(ch)) {
				afterDef = false;
				// Make sure it's a true line-end, with no backslash
				if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
					&& chPrev != '\\') {
					// Assume we've hit the end of the statement.
					preferRE = true;
				}
			}
			if (afterDef && state != SCE_RB_DEFAULT) {
				afterDef = false;
			}
		} else if (state == SCE_RB_WORD) {
			if (ch == '.' || !isSafeWordcharOrHigh(ch)) {
				// Words include x? in all contexts,
				// and <letters>= after either 'def' or a dot
				// Move along until a complete word is on our left

				// Default accessor treats '.' as word-chars,
				// but we don't for now.

				if (ch == '='
					&& isSafeWordcharOrHigh(chPrev)
					&& (chNext == '(' || IsASpace(chNext))
					&& (StrEqual(prevWord, "def") || followsDot(styler.GetStartSegment(), styler))) {
					// <name>= is a name only when being def'd -- Get it the next time
					// This means that <name>=<name> is always lexed as
					// <name>, (op, =), <name>
				} else if (ch == ':'
					&& isSafeWordcharOrHigh(chPrev)
					&& IsASpace(chNext)) {
					// keyword argument, symbol Hash key
					styler.ColorTo(i + 1, SCE_RB_SYMBOL);
					state = SCE_RB_DEFAULT;
					preferRE = true;
				} else if ((ch == '?' || ch == '!')
					&& isSafeWordcharOrHigh(chPrev)
					&& !isSafeWordcharOrHigh(chNext)) {
					// <name>? is a name -- Get it the next time
					// But <name>?<name> is always lexed as
					// <name>, (op, ?), <name>
					// Same with <name>! to indicate a method that
					// modifies its target
				} else if (IsEOLChar(ch) && styler.Match(i - 7, "__END__")) {
					styler.ColorTo(i + 1, SCE_RB_DATASECTION);
					state = SCE_RB_DATASECTION;
					// No need to handle this state -- we'll just move to the end
					preferRE = false;
				} else {
					const Sci_Position wordStartPos = styler.GetStartSegment();
					const int word_style = ClassifyWordRb(wordStartPos, i, ch, chNext, keywordLists, styler, prevWord);
					preferRE = false;
					switch (word_style) {
					case SCE_RB_WORD:
						afterDef = StrEqual(prevWord, "def");
						preferRE = !IsLowerCase(prevWord[0]) || keywordLists[KeywordIndex_Regex].InList(prevWord);
						break;

					case SCE_RB_WORD_DEMOTED:
					case SCE_RB_DEF_NAME:
					case SCE_RB_BUILTIN_FUNCTION:
						preferRE = true;
						break;

					case SCE_RB_IDENTIFIER:
						preferRE = IsEOLChar(ch);
						break;

					default:
						break;
					}
					if (ch == '.') {
						// We might be redefining an operator-method
						afterDef = word_style == SCE_RB_DEF_NAME;
					}
					// And if it's the first
					redo_char(i, ch, chNext, chNext2, state); // pass by ref
				}
			}
		} else if (state == SCE_RB_NUMBER) {
			if (!is_real_number) {
				if (ch != '\\' || chPrev == '\\') {
					styler.ColorTo(i + 1, state);
					state = SCE_RB_DEFAULT;
					preferRE = false;
				} else if (isEscapeSequence(chNext)) {
					// Terminal escape sequence -- handle it next time
					// Nothing more to do this time through the loop
				} else if (chNext == 'C' || chNext == 'M') {
					if (chNext2 != '-') {
						// \C or \M ends the sequence -- handle it next time
					} else {
						// Move from abc?\C-x
						//				 ^
						// to
						//				   ^
						i += 2;
						ch = chNext2;
						chNext = styler.SafeGetCharAt(i + 1);
					}
				} else if (chNext == 'c') {
					// Stay here, \c is a combining sequence
					advance_char(i, ch, chNext, chNext2); // pass by ref
				} else {
					// ?\x, including ?\\ is final.
					styler.ColorTo(i + 2, state);
					state = SCE_RB_DEFAULT;
					preferRE = false;
					advance_char(i, ch, chNext, chNext2);
				}
			} else if (isSafeAlnumOrHigh(ch) || (ch == '.' && isSafeDigit(chNext))) {
				// Keep going
			} else if (ch == '.' && chNext == '.') {
				styler.ColorTo(i, state);
				redo_char(i, ch, chNext, chNext2, state); // pass by ref
			} else {
				styler.ColorTo(i, state);
				redo_char(i, ch, chNext, chNext2, state); // pass by ref
				preferRE = false;
			}
		} else if (state == SCE_RB_COMMENTLINE) {
			if (IsEOLChar(ch)) {
				styler.ColorTo(i, state);
				state = SCE_RB_DEFAULT;
				// Use whatever setting we had going into the comment
			}
		} else if (state == SCE_RB_HERE_DELIM) {
			// See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
			// Slightly different: if we find an immediate '-',
			// the target can appear indented.

			if (HereDoc.State == 0) { // '<<' encountered
				HereDoc.State = 1;
				HereDoc.DelimiterLength = 0;
				if (ch == '-' || ch == '~') {
					HereDoc.CanBeIndented = true;
					advance_char(i, ch, chNext, chNext2); // pass by ref
				} else {
					HereDoc.CanBeIndented = false;
				}
				if (IsEOLChar(ch)) {
					// Bail out of doing a here doc if there's no target
					state = SCE_RB_DEFAULT;
					preferRE = false;
				} else {
					HereDoc.Quote = ch;

					if (ch == '\'' || ch == '"' || ch == '`') {
						HereDoc.Quoted = true;
						HereDoc.Delimiter[0] = '\0';
					} else {
						HereDoc.Quoted = false;
						HereDoc.Delimiter[0] = ch;
						HereDoc.Delimiter[1] = '\0';
						HereDoc.DelimiterLength = 1;
					}
				}
			} else if (HereDoc.State == 1) { // collect the delimiter
				if (IsEOLChar(ch)) {
					// End the quote now, and go back for more
					styler.ColorTo(i, state);
					state = SCE_RB_DEFAULT;
					i--;
					chNext = ch;
					preferRE = false;
				} else if (HereDoc.Quoted) {
					if (ch == HereDoc.Quote) { // closing quote => end of delimiter
						styler.ColorTo(i + 1, state);
						state = SCE_RB_DEFAULT;
						preferRE = false;
					} else {
						if (ch == '\\' && !IsEOLChar(chNext)) {
							advance_char(i, ch, chNext, chNext2);
						}
						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
					}
				} else { // an unquoted here-doc delimiter
					if (isSafeAlnumOrHigh(ch)) {
						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
					} else {
						styler.ColorTo(i, state);
						redo_char(i, ch, chNext, chNext2, state);
						preferRE = false;
					}
				}
				if (HereDoc.DelimiterLength >= sizeof(HereDoc.Delimiter) - 1) {
					styler.ColorTo(i, state);
					state = SCE_RB_ERROR;
					preferRE = false;
				}
			}
		} else if (state == SCE_RB_HERE_Q || state == SCE_RB_HERE_QQ || state == SCE_RB_HERE_QX) {
			if (ch == '\\' && !IsEOLChar(chNext)) {
				advance_char(i, ch, chNext, chNext2);
			} else if (ch == '#' && state != SCE_RB_HERE_Q
				&& (chNext == '{' || chNext == '@' || chNext == '$')) {
				if (chNext == '{') {
					if (innerExpr.canEnter()) {
						// process #{ ... }
						styler.ColorTo(i, state);
						styler.ColorTo(i + 2, SCE_RB_OPERATOR);
						innerExpr.enter(state, Quote);
						preferRE = true;
						// Skip one
						advance_char(i, ch, chNext, chNext2);
					}
				} else {
					InterpolateVariable(styler, state, i, ch, chNext, chNext2);
				}
			}

			// Not needed: HereDoc.State == 2
			// Indentable here docs: look backwards
			// Non-indentable: look forwards, like in Perl
			//
			// Why: so we can quickly resolve things like <<-" abc"

			else if (!HereDoc.CanBeIndented) {
				if (IsEOLChar(chPrev) && styler.Match(i, HereDoc.Delimiter)) {
					styler.ColorTo(i, state);
					i += HereDoc.DelimiterLength - 1;
					chNext = styler.SafeGetCharAt(i + 1);
					if (IsEOLChar(chNext)) {
						styler.ColorTo(i + 1, SCE_RB_HERE_DELIM);
						state = SCE_RB_DEFAULT;
						HereDoc.State = 0;
						preferRE = false;
					}
					// Otherwise we skipped through the here doc faster.
				}
			} else if (IsEOLChar(chNext)
				&& lookingAtHereDocDelim(styler, i + 1 - HereDoc.DelimiterLength, HereDoc.Delimiter)) {
				styler.ColorTo(i + 1 - HereDoc.DelimiterLength, state);
				styler.ColorTo(i + 1, SCE_RB_HERE_DELIM);
				state = SCE_RB_DEFAULT;
				preferRE = false;
				HereDoc.State = 0;
			}
		} else if (state == SCE_RB_CLASS_VAR
				|| state == SCE_RB_INSTANCE_VAR
				|| state == SCE_RB_SYMBOL) {
			if (state == SCE_RB_SYMBOL &&
					// FIDs suffices '?' and '!'
					(((ch == '!' || ch == '?') && chNext != '=') ||
					 // identifier suffix '='
					 (ch == '=' && (chNext != '~' && chNext != '>' &&
									(chNext != '=' || chNext2 == '>'))))) {
				styler.ColorTo(i + 1, state);
				state = SCE_RB_DEFAULT;
				preferRE = false;
			} else if (!isSafeWordcharOrHigh(ch)) {
				styler.ColorTo(i, state);
				redo_char(i, ch, chNext, chNext2, state); // pass by ref
				preferRE = false;
			}
		} else if (state == SCE_RB_GLOBAL) {
			if (!isSafeWordcharOrHigh(ch)) {
				// handle special globals here as well
				if (chPrev == '$') {
					if (ch == '-') {
						// Include the next char, like $-a
						advance_char(i, ch, chNext, chNext2);
					}
					styler.ColorTo(i + 1, state);
					state = SCE_RB_DEFAULT;
				} else {
					styler.ColorTo(i, state);
					redo_char(i, ch, chNext, chNext2, state); // pass by ref
				}
				preferRE = false;
			}
		} else if (state == SCE_RB_POD) {
			// PODs end with ^=end\s, -- any whitespace can follow =end
			if (IsASpace(ch)
				&& i > 5
				&& IsEOLChar(styler[i - 5])
				&& styler.Match(i - 4, "=end")) {
				styler.ColorTo(i, state);
				state = SCE_RB_DEFAULT;
				preferRE = false;
			}
		} else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
			if (ch == '\\' && Quote.Up != '\\') {
				// Skip one
				advance_char(i, ch, chNext, chNext2);
			} else if (ch == Quote.Down) {
				Quote.Count--;
				if (Quote.Count == 0) {
					// Include the options
					while (isSafeAlpha(chNext)) {
						i++;
						ch = chNext;
						chNext = styler.SafeGetCharAt(i + 1);
					}
					styler.ColorTo(i + 1, state);
					state = SCE_RB_DEFAULT;
					preferRE = false;
				}
			} else if (ch == Quote.Up) {
				// Only if close quoter != open quoter
				Quote.Count++;
			} else if (ch == '#') {
				if (chNext == '{') {
					if (innerExpr.canEnter()) {
						// process #{ ... }
						styler.ColorTo(i, state);
						styler.ColorTo(i + 2, SCE_RB_OPERATOR);
						innerExpr.enter(state, Quote);
						preferRE = true;
						// Skip one
						advance_char(i, ch, chNext, chNext2);
					}
				} else if (chNext == '@' || chNext == '$') {
					InterpolateVariable(styler, state, i, ch, chNext, chNext2);
				} else {
					//todo: distinguish comments from pound chars
					// for now, handle as comment
					styler.ColorTo(i, state);
					bool inEscape = false;
					while (++i < lengthDoc) {
						ch = styler.SafeGetCharAt(i);
						if (ch == '\\') {
							inEscape = true;
						} else if (IsEOLChar(ch)) {
							// Comment inside a regex
							styler.ColorTo(i, SCE_RB_COMMENTLINE);
							break;
						} else if (inEscape) {
							inEscape = false;  // don't look at char
						} else if (ch == Quote.Down) {
							// Have the regular handler deal with this
							// to get trailing modifiers.
							i--;
							ch = styler[i];
							break;
						}
					}
					chNext = styler.SafeGetCharAt(i + 1);
				}
			}
			// Quotes of all kinds...
		} else if (IsPercentLiteral(state) ||
				state == SCE_RB_STRING_DQ || state == SCE_RB_STRING_SQ ||
				state == SCE_RB_BACKTICKS) {
			if (!Quote.Down && !isspacechar(ch)) {
				Quote.Open(ch);
			} else if (ch == '\\' && Quote.Up != '\\') {
				//Riddle me this: Is it safe to skip *every* escaped char?
				advance_char(i, ch, chNext, chNext2);
			} else if (ch == Quote.Down) {
				Quote.Count--;
				if (Quote.Count == 0) {
					styler.ColorTo(i + 1, state);
					state = SCE_RB_DEFAULT;
					preferRE = false;
				}
			} else if (ch == Quote.Up) {
				Quote.Count++;
			} else if (ch == '#' && IsInterpolableLiteral(state)) {
				if (chNext == '{') {
					if (innerExpr.canEnter()) {
						// process #{ ... }
						styler.ColorTo(i, state);
						styler.ColorTo(i + 2, SCE_RB_OPERATOR);
						innerExpr.enter(state, Quote);
						preferRE = true;
						// Skip one
						advance_char(i, ch, chNext, chNext2);
					}
				} else if (chNext == '@' || chNext == '$') {
					InterpolateVariable(styler, state, i, ch, chNext, chNext2);
				}
			}
		}

		if (state == SCE_RB_ERROR) {
			break;
		}
		chPrev = ch;
	}
	if (state == SCE_RB_WORD) {
		// We've ended on a word, possibly at EOF, and need to
		// classify it.
		ClassifyWordRb(styler.GetStartSegment(), lengthDoc, '\0', '\0', keywordLists, styler, prevWord);
	} else {
		styler.ColorTo(lengthDoc, state);
	}
}

// Helper functions for folding, disambiguation keywords
// Assert that there are no high-bit chars

void getPrevWord(Sci_Position pos, char *prevWord, LexAccessor &styler, int word_state) {
	Sci_Position i;
	styler.Flush();
	for (i = pos - 1; i > 0; i--) {
		if (styler.StyleAt(i) != word_state) {
			i++;
			break;
		}
	}
	if (i < pos - MAX_KEYWORD_LENGTH) // overflow
		i = pos - MAX_KEYWORD_LENGTH;
	char *dst = prevWord;
	for (; i <= pos; i++) {
		*dst++ = styler[i];
	}
	*dst = 0;
}

bool keywordIsAmbiguous(const char *prevWord) noexcept {
	// Order from most likely used to least likely
	// Lots of ways to do a loop in Ruby besides 'while/until'
	return StrEqualsAny(prevWord, "if", "do", "while", "unless", "until");
}

// Demote keywords in the following conditions:
// if, while, unless, until modify a statement
// do after a while or until, as a noise word (like then after if)

bool keywordIsModifier(const char *word, Sci_Position pos, LexAccessor &styler) {
	if (StrEqual(word, "do")) {
		return keywordDoStartsLoop(pos, styler);
	}

	int style = SCE_RB_DEFAULT;
	Sci_Line lineStart = styler.GetLine(pos);
	Sci_Position lineStartPosn = styler.LineStart(lineStart);
	// We want to step backwards until we don't care about the current
	// position. But first move lineStartPosn back behind any
	// continuations immediately above word.
	while (lineStartPosn > 0) {
		const char ch = styler[lineStartPosn - 1];
		if (ch == '\n' || ch == '\r') {
			const char chPrev = styler.SafeGetCharAt(lineStartPosn - 2);
			const char chPrev2 = styler.SafeGetCharAt(lineStartPosn - 3);
			// If we find a continuation line, include it in our analysis.
			if (chPrev == '\\' || (ch == '\n' && chPrev == '\r' && chPrev2 == '\\')) {
				lineStart = styler.GetLine(lineStartPosn - 1);
				lineStartPosn = styler.LineStart(lineStart);
			} else {
				break;
			}
		} else {
			break;
		}
	}

	styler.Flush();
	while (--pos >= lineStartPosn) {
		style = styler.StyleAt(pos);
		if (style == SCE_RB_DEFAULT) {
			const char ch = styler[pos];
			if (IsASpaceOrTab(ch)) {
				//continue
			} else if (ch == '\r' || ch == '\n') {
				// Scintilla's LineStart() and GetLine() routines aren't
				// platform-independent, so if we have text prepared with
				// a different system we can't rely on it.

				// Also, lineStartPosn may have been moved to more than one
				// line above word's line while pushing past continuations.
				const char chPrev = styler.SafeGetCharAt(pos - 1);
				const char chPrev2 = styler.SafeGetCharAt(pos - 2);
				if (chPrev == '\\') {
					pos -= 1;	 // gloss over the "\\"
					//continue
				} else if (ch == '\n' && chPrev == '\r' && chPrev2 == '\\') {
					pos -= 2;	 // gloss over the "\\\r"
					//continue
				} else {
					return false;
				}
			}
		} else {
			break;
		}
	}
	if (pos < lineStartPosn) {
		return false;
	}
	// First things where the action is unambiguous
	switch (style) {
	case SCE_RB_DEFAULT:
	case SCE_RB_COMMENTLINE:
	case SCE_RB_POD:
	case SCE_RB_CLASS_NAME:
	case SCE_RB_DEF_NAME:
	case SCE_RB_MODULE_NAME:
		return false;
	case SCE_RB_OPERATOR:
		break;
	case SCE_RB_WORD:
		// Watch out for uses of 'else if'
		//XXX: Make a list of other keywords where 'if' isn't a modifier
		//	   and can appear legitimately
		// Formulate this to avoid warnings from most compilers
		if (StrEqual(word, "if")) {
			char prevWord[MAX_KEYWORD_LENGTH + 1];
			getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
			return !StrEqual(prevWord, "else");
		}
		return true;
	default:
		return true;
	}
	// Assume that if the keyword follows an operator,
	// usually it's a block assignment, like
	// a << if x then y else z

	const char ch = styler[pos];
	return AnyOf(ch, ')', ']', '}');
}

#define WHILE_BACKWARDS "elihw"
#define UNTIL_BACKWARDS "litnu"
#define FOR_BACKWARDS "rof"

// Nothing fancy -- look to see if we follow a while/until somewhere
// on the current line

bool keywordDoStartsLoop(Sci_Position pos, LexAccessor &styler) {
	const Sci_Line lineStart = styler.GetLine(pos);
	const Sci_Position lineStartPosn = styler.LineStart(lineStart);
	styler.Flush();
	while (--pos >= lineStartPosn) {
		const int style = styler.StyleAt(pos);
		if (style == SCE_RB_DEFAULT) {
			const char ch = styler[pos];
			if (ch == '\r' || ch == '\n') {
				// Scintilla's LineStart() and GetLine() routines aren't
				// platform-independent, so if we have text prepared with
				// a different system we can't rely on it.
				return false;
			}
		} else if (style == SCE_RB_WORD) {
			// Check for while or until, but write the word in backwards
			char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
			char *dst = prevWord;
			int wordLen = 0;
			Sci_Position start_word;
			for (start_word = pos;
				start_word >= lineStartPosn && styler.StyleAt(start_word) == SCE_RB_WORD;
				start_word--) {
				if (++wordLen < MAX_KEYWORD_LENGTH) {
					*dst++ = styler[start_word];
				}
			}
			*dst = 0;
			// Did we see our keyword?
			if (StrEqualsAny(prevWord, WHILE_BACKWARDS, UNTIL_BACKWARDS, FOR_BACKWARDS)) {
				return true;
			}
			// We can move pos to the beginning of the keyword, and then
			// accept another decrement, as we can never have two contiguous
			// keywords:
			// word1 word2
			//			 ^
			//		  <-  move to start_word
			//		^
			//		<- loop decrement
			//	   ^  # pointing to end of word1 is fine
			pos = start_word;
		}
	}
	return false;
}

/*
 *  Folding Ruby
 *
 *  The language is quite complex to analyze without a full parse.
 *  For example, this line shouldn't affect fold level:
 *
 *   print "hello" if feeling_friendly?
 *
 *  Neither should this:
 *
 *   print "hello" \
 *      if feeling_friendly?
 *
 *
 *  But this should:
 *
 *   if feeling_friendly?  #++
 *     print "hello" \
 *     print "goodbye"
 *   end                   #--
 *
 *  So we cheat, by actually looking at the existing indentation
 *  levels for each line, and just echoing it back.  Like Python.
 *  Then if we get better at it, we'll take braces into consideration,
 *  which always affect folding levels.

 *  How the keywords should work:
 *  No effect:
 *  __FILE__ __LINE__ BEGIN END alias and
 *  defined? false in nil not or self super then
 *  true undef

 *  Always increment:
 *  begin  class def do for module when {
 *
 *  Always decrement:
 *  end }
 *
 *  Increment if these start a statement
 *  if unless until while -- do nothing if they're modifiers

 *  These end a block if there's no modifier, but don't bother
 *  break next redo retry return yield
 *
 *  These temporarily de-indent, but re-indent
 *  case else elsif ensure rescue
 *
 *  This means that the folder reflects indentation rather
 *  than setting it.  The language-service updates indentation
 *  when users type return and finishes entering de-denters.
 *
 *  Later offer to fold POD, here-docs, strings, and blocks of comments
 */

#define IsCommentLine(line)	IsLexCommentLine(styler, line, SCE_RB_COMMENTLINE)

void FoldRbDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, LexerWordList keywordLists, Accessor &styler) {
	synchronizeDocStart(startPos, length, initStyle, styler, false);
	const Sci_PositionU endPos = startPos + length;
	Sci_Line lineCurrent = styler.GetLine(startPos);
	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
	int levelCurrent = levelPrev;
	uint8_t chPrev = '\0';
	uint8_t chNext = styler[startPos];
	int styleNext = styler.StyleAt(startPos);
	int stylePrev = styler.StyleAt(startPos - 1);
	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);
	lineStartNext = sci::min(lineStartNext, endPos);
	// detect endless method definition to fix up code folding
	enum class MethodDefinition {
		None,
		Define,
		Operator,
		Name,
		Argument,
	};
	MethodDefinition method_definition = MethodDefinition::None;
	int argument_paren_count = 0;
	bool heredocOpen = false;

	while (startPos < endPos) {
		const uint8_t ch = chNext;
		chNext = styler[++startPos];
		const int style = styleNext;
		styleNext = styler.StyleAt(startPos);

		if (style == SCE_RB_POD) {
			if (ch == '=') {
				if (styler.Match(startPos, "begin")) {
					levelCurrent++;
				} else if (styler.Match(startPos, "end")) {
					levelCurrent--;
				}
			}
		} else if (style == SCE_RB_OPERATOR) {
			if (ch == '(' || ch == '{' || ch == '[') {
				levelCurrent++;
			} else if (ch == ')' || ch == '}' || ch == ']') {
				levelCurrent--;
			}
		} else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
			// Look at the keyword on the left and decide what to do
			char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
			prevWord[0] = 0;
			getPrevWord(startPos - 1, prevWord, styler, SCE_RB_WORD);
			if (StrEqual(prevWord, "end")) {
				levelCurrent--;
			} else if (StrEqual(prevWord, "def")) {
				levelCurrent++;
				method_definition = MethodDefinition::Define;
			} else if (keywordLists[KeywordIndex_CodeFolding].InList(prevWord)) {
				levelCurrent++;
			}
		} else if (style == SCE_RB_HERE_DELIM && !heredocOpen) {
			if (stylePrev == SCE_RB_OPERATOR && chPrev == '<' && styler.SafeGetCharAt(startPos - 3) == '<') {
				levelCurrent++;
				heredocOpen = true;
			} else if (styleNext != SCE_RB_HERE_DELIM) {
				levelCurrent--;
			}
		} else if (style == SCE_RB_STRING_QW || style == SCE_RB_STRING_W) {
			if (stylePrev != style) {
				levelCurrent++;
			}
			if (styleNext != style) {
				levelCurrent--;
			}
		}
		if (method_definition != MethodDefinition::None) {
			switch (method_definition) {
			case MethodDefinition::Define:
				if (style == SCE_RB_OPERATOR) {
					method_definition = MethodDefinition::Operator;
				} else if (style == SCE_RB_DEF_NAME || style == SCE_RB_WORD_DEMOTED || IsIdentifierStyle(style)) {
					method_definition = MethodDefinition::Name;
				} else if (!(style == SCE_RB_WORD || IsASpaceOrTab(ch))) {
					method_definition = MethodDefinition::None;
				}
				if (method_definition <= MethodDefinition::Define) {
					break;
				}
				// fall through for unary operator or single letter name
				[[fallthrough]];
			case MethodDefinition::Operator:
			case MethodDefinition::Name:
				if (IsEOLChar(chNext) || chNext == '#') {
					method_definition = MethodDefinition::None;
				} else if (chNext == '(' || chNext <= ' ') {
					// setter method cannot be defined in an endless method definition.
					if (ch == '=' && (method_definition == MethodDefinition::Name || chPrev == ']')) {
						method_definition = MethodDefinition::None;
					} else {
						method_definition = MethodDefinition::Argument;
						argument_paren_count = 0;
					}
				}
				break;
			case MethodDefinition::Argument:
				if (style == SCE_RB_OPERATOR) {
					if (ch == '(') {
						++argument_paren_count;
					} else if (ch == ')') {
						--argument_paren_count;
					} else if (argument_paren_count == 0) {
						method_definition = MethodDefinition::None;
						if (ch == '=' && levelCurrent > 0) {
							levelCurrent--;
						}
					}
				} else if (argument_paren_count == 0 && !IsASpaceOrTab(ch)) {
					// '=' must be first character after method name or right parenthesis
					method_definition = MethodDefinition::None;
				}
				break;
			default:
				break;
			}
		}

		chPrev = ch;
		stylePrev = style;
		if (startPos == lineStartNext) {
			levelCurrent = sci::max(levelCurrent, SC_FOLDLEVELBASE);
			if (IsCommentLine(lineCurrent)) {
				levelCurrent += IsCommentLine(lineCurrent + 1) - IsCommentLine(lineCurrent - 1);
			}

			int lev = levelPrev;
			if ((levelCurrent > levelPrev))
				lev |= SC_FOLDLEVELHEADERFLAG;
			styler.SetLevel(lineCurrent, lev);
			lineCurrent++;
			lineStartNext = styler.LineStart(lineCurrent + 1);
			lineStartNext = sci::min(lineStartNext, endPos);
			levelPrev = levelCurrent;
			method_definition = MethodDefinition::None;
			argument_paren_count = 0;
			heredocOpen = false;
		}
	}
}

}

LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc);
